package com.apobates.forum.utils;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
 * 字符串搜索工具
 * 
 * @author xiaofanku
 * @see 20200107
 */
public final class StringSeekUtils {
	private final static Logger logger=LoggerFactory.getLogger(StringSeekUtils.class);
	
	private StringSeekUtils() throws Exception{
		throw new Exception("不需要实化公共工具类");
	}
	/**
	 * 从参数的字典文件中随机读取三行
	 * 
	 * @return
	 * @throws IOException
	 */
	public static String search(File directoryFile) throws IOException{
		StringBuffer sb = new StringBuffer();
		try(RandomAccessFile file = new RandomAccessFile(directoryFile, "r")){
			//
			long totalLen = file.length();
			long seekPos = Commons.randomNumeric(0L, totalLen);
			logger.info("seek position: "+ seekPos);
			//
			file.seek(seekPos);
			String tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
			sb.append(tmp);
			//
			tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
			sb.append(tmp);
			//
			tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
			sb.append(tmp);
		}
		return sb.toString();
	}
	
	/**
	 * 统计参数中的词频.只适用于英文
	 * 
	 * @param sourceStr
	 * @return
	 */
	public static Map<String,Integer> wordFrequencyCount(String sourceStr){
		Objects.requireNonNull(sourceStr);
		List<String> list = Stream.of(sourceStr).map(w -> w.split("\\s+")).flatMap(Arrays::stream).collect(Collectors.toList());
		
		Map<String, Integer> wordCounter = list.stream().collect(Collectors.toMap(w -> w.toLowerCase(), w -> 1, Integer::sum));
		return wordCounter;
	}
	
	/**
	 * 查找参数中长度超过指定长度的单词.只适用于英文
	 * 
	 * @param sourceStr
	 * @param minLength
	 * @return
	 */
	public static String walkWord(String sourceStr, int minLength){
		List<String> list = Stream.of(sourceStr).map(w -> w.split("\\s+")).flatMap(Arrays::stream).filter(str -> str.length() >= minLength).collect(Collectors.toList());
		if(null==list || list.isEmpty()){
			return null;
		}
		String t = String.join("|", list);
		logger.info("source rawdata: "+t);
		Random rand = new Random(); 
		int tmpos=rand.nextInt(list.size());
		String data = list.get(tmpos).toLowerCase();
		logger.info("current word: "+data);
		return data;
	}
	
	/**
	 * 过滤参数的中非字母符号.只适用于英文
	 * 
	 * @param rndStr
	 * @return
	 */
	public static String clearWord(String rndStr){
		logger.info("clear word: "+rndStr);
		StringBuffer sb = new StringBuffer();
		for(char c : rndStr.toCharArray()){
			if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')){
				sb.append(c);
			}else{
				break;
			}
		}
		return sb.toString();
	}
	
	/**
	 * 通过正则表达式统计字符串(content)中指定字符串(findWord)出现的次数
	 * 
	 * @param content
	 *            查找的字符串内容
	 * @param findWord
	 *            待查找的字符串
	 * @return
	 */
	public static int queryWordCountByExpReg(String content, String findWord) {
		Objects.requireNonNull(findWord);
		//
		if (!Commons.isNotBlank(content)) {
			return -1;
		}
		if (!Commons.isNotBlank(findWord)) {
			return -1;
		}
		// https://blog.csdn.net/lsylsy726/article/details/86697513
		int affect = 0;
		Pattern pattern = Pattern.compile(findWord); // 创建一个正则表达式
		Matcher matcher = pattern.matcher(content); // 创建一个匹配器
		while (matcher.find()) { // while循环的条件，若matcher.find()为true即匹配成功
			affect += 1; // 若匹配成功则i自增
		}
		return affect;
	}
	/**
	 * 统计字符串(content)中指定字符串(findWord)出现的次数
	 * 
	 * @param content  查找的字符串内容
	 * @param findWord 待查找的字符串
	 * @return
	 */
	public static int queryWordCount(String content, String findWord) {
		Objects.requireNonNull(findWord);
		//
		if (!Commons.isNotBlank(content)) {
			return -1;
		}
		if (!Commons.isNotBlank(findWord)) {
			return -1;
		}
		// findWord转成unicode放到struct中
		List<String> struct = stringToUnicode(findWord);
		logger.info("[QWC][1]ref code: " + String.join(",", struct));
		// content转成unicode放到source中
		List<String> source = stringToUnicode(content);
		Map<Integer, List<String>> result = new HashMap<>();
		int i = 1;
		for (String str : source) {
			logger.info("[QWC][2]union code: " + str + ",i: " + i);
			// 取得str在struct中的下标
			if (struct.contains(str)) {
				int index = struct.indexOf(str);
				logger.info("[QWC][3]current ref: " + index);
				int resultKey = i - index;
				logger.info("[QWC][4]current loop: " + resultKey);
				if (result.containsKey(resultKey)) {
					result.get(resultKey).add(str);
				} else {
					if (0 == index) {
						List<String> tmp = new ArrayList<>();
						tmp.add(str);
						result.put(i, tmp);
					}
				}
			}
			i += 1;
		}
		// 数一数result中的Value长度等于struct长度的数量
		int j = 0;
		for (List<String> v : result.values()) {
			if (v.size() == struct.size()) {
				logger.info("[QWC][5]loop string: " + unicodeToString(v));
				j += 1;
			}
		}

		return j;
	}
	/**
	 * 字符串转成 unicode 码值
	 * 
	 * @param str
	 * @return 返回的列表集合元素为4位值,例:1357,unicode值为\u1357
	 */
	private static List<String> stringToUnicode(String str) {
		List<String> struct = new ArrayList<>();
		char[] c = str.toCharArray();
		for (int i = 0; i < c.length; i++) {
			struct.add(Integer.toHexString(c[i]));
		}
		return struct;
	}
	/**
	 * 字符串转成 unicode 码值字符串
	 * 
	 * @param str
	 * @return
	 */
	private static String strToUnicode(String str){
		StringBuffer struct = new StringBuffer();
		char[] c = str.toCharArray();
		for (int i = 0; i < c.length; i++) {
			struct.append(Integer.toHexString(c[i]));
		}
		return struct.toString();
	}
	/**
	 * stringToUnicode方法的反方法
	 * 
	 * @param unicodeCharSet 列表集合元素为4位值,例:1357,unicode值为\u1357
	 * @return
	 */
	private static String unicodeToString(List<String> unicodeCharSet) {
		StringBuffer sb = new StringBuffer();
		for (String str : unicodeCharSet) {
			int index = Integer.parseInt(str, 16);
			sb.append((char) index);
		}
		return sb.toString();
	}
	/**
	 * 使用Boyer Moore 算法(Algorithm)搜索模式字符串在内容中出现的次数
	 * 
	 * @param content  内容
	 * @param findWord 模式字符串
	 * @return
	 */
	public static int queryWordCountByBM(String content, String findWord) {
		Objects.requireNonNull(findWord);
		//
		if (!Commons.isNotBlank(content)) {
			return -1;
		}
		if (!Commons.isNotBlank(findWord)) {
			return -1;
		}
		String source = strToUnicode(findWord);
		String target = strToUnicode(content);
		return searchPattern(target, source);
	}
	/**
	 * 使用Boyer Moore 算法(Algorithm)搜索模式字符串在内容中出现的次数
	 * 
	 * @param mainString 内容
	 * @param pattern    模式字符串
	 * @param array      匹配的下标位的数组
	 * @return
	 */
	public static int searchPattern(String mainString, String pattern) {
		int index=0;
		int patLen = pattern.length();
		int strLen = mainString.length();
		int[] borderArray=new int[patLen+1];
		int[] shiftArray=new int[patLen + 1];
		
		for(int i = 0; i<=patLen; i++) {
			shiftArray[i] = 0; //set all shift array to 0
		}
		
		fullSuffixMatch(shiftArray, borderArray, pattern);
		partialSuffixMatch(shiftArray, borderArray, pattern);
		int shift = 0;
		
		while(shift <= (strLen - patLen)) {
			int j = patLen - 1;
			while(j >= 0 && pattern.charAt(j) == mainString.charAt(shift+j)) {
				j--; //reduce j when pattern and main string character is matching
			}
			
			if(j < 0) {
				index++;
				//array[index-1] = shift;
				shift += shiftArray[0];
			}else {
				shift += shiftArray[j+1];
			}
		}
		return index;
	}
	
	private static void fullSuffixMatch(int[] shiftArr, int[] borderArr, String pattern) {
		int n = pattern.length(); // find length of pattern
		int i = n;
		int j = n + 1;
		borderArr[i] = j;

		while (i > 0) {
			// search right when (i-1)th and (j-1)th item are not same
			while (j <= n && pattern.charAt(i - 1) != pattern.charAt(j - 1)) {
				if (shiftArr[j] == 0){
					shiftArr[j] = j - i; // shift pattern from i to j
				}
				j = borderArr[j]; // update border
			}
			i--;
			j--;
			borderArr[i] = j;
		}
	}

	private static void partialSuffixMatch(int[] shiftArr, int[] borderArr, String pattern) {
		int n = pattern.length(); // find length of pattern
		int j;
		j = borderArr[0];

		for (int i = 0; i < n; i++) {
			if (shiftArr[i] == 0){
				shiftArr[i] = j; // when shift is 0, set shift to border value
			}
			if (i == j){
				j = borderArr[j]; // update border value
			}
		}
	}
}
